library(dplyr)
library(tidyr)
library(here)
library(readr)
library(ggplot2)
library(googlesheets4)
library(vegan)
library(readxl)
#Analysis
gURL <- "https://docs.google.com/spreadsheets/d/1GKWzD0q683oH3I_i5ueulbf4P4Q-9-a9YPbdyUZyF2A/edit#gid=847966271"
Meta <- read_sheet(gURL, sheet = "Sample_data_corr") %>%
select(Sample_names, Sites, Sampler, duration, Type)
✔ Reading from sampler_comp_meta.
✔ Range ''Sample_data_corr''.
Plates <- read_sheet(gURL, sheet = "Plates", range = "A26:M44", col_types = "c") %>%
dplyr::rename(row = 1) %>%
filter(row %in% LETTERS[1:8]) %>%
mutate(Plate = rep(c("Plate_1", "Plate_2"), each = 8)) %>%
pivot_longer(!one_of(c("row", "Plate")), names_to = "col", values_to = "Sample_names") %>%
select(Plate, row, col, Sample_names)
✔ Reading from sampler_comp_meta.
✔ Range ''Plates'!A26:M44'.
Plates <-
Plates %>%
filter(!is.na(Sample_names)) %>%
left_join(Meta) %>%
mutate(Type = case_when(grepl("PCR", Sample_names) ~ "PCR_control",
TRUE ~ Type))
Joining with `by = join_by(Sample_names)`
shini_barcodes <-
read_xlsx(here("Documents", "Shini_barcodes_4_plates.xlsx")) %>%
mutate(Plate = rep(rep(paste("Plate", 1:4, sep = "_"), each = 96),2)) %>%
filter(Plate %in% c("Plate_1", "Plate_2")) %>%
dplyr::rename(Index = 1) %>%
select(Index, Plate) %>%
filter(grepl("_i5$", Index)) %>%
mutate(Index = gsub("(.+?)_i5", "\\1", Index)) %>%
group_by(Plate) %>%
mutate(row = rep(LETTERS[1:8], each = 12)) %>%
mutate(col = as.character(rep(1:12, 8)))
ASV_sp <- read_tsv(here("Data", "iSeq", "ITS_ASW_glom.txt"))
Rows: 169 Columns: 399── Column specification ────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): Sample
dbl (398): seq_0001, seq_0003, seq_0004, seq_0009, seq_0010, seq_0011, seq_0013, seq_0016, seq_0018,...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ITS_taxa_80_glom <- read_tsv(here("Data","iSeq", "ITS_taxa_80_glom.txt"))
Rows: 398 Columns: 8── Column specification ────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (8): seq, kingdom, phylum, class, order, family, genus, species
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DNAC <- read_tsv(here("Data", "DNA_conc_air_sampler_comp.txt"))
Rows: 304 Columns: 17── Column specification ────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (12): Sample_names, Sites, Sampler, duration, Type, Tube, Material, Site_Code, Sampler_Code, Co...
dbl (4): OD, Buffer_vol, Date_Code, DNAC
dttm (1): Dates
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clean
Fungi_ASV <-
ITS_taxa_80_glom %>%
filter(phylum %in% c("Ascomycota", "Basidiomycota")) %>%
pull(seq)
ASV_sp <- ASV_sp[,colnames(ASV_sp) %in% c("Sample", Fungi_ASV)]
Meta <-
Plates %>%
left_join(shini_barcodes)
Joining with `by = join_by(Plate, row, col)`
flow_rate <-
Meta %>%
filter(Type == "active" & Sampler != "Drone") %>%
select(Sampler, duration) %>%
distinct() %>%
mutate(flow = case_when(
Sampler == "Hepa" ~ 60,
Sampler == "Kärcher" ~ 3000,
Sampler == "Sass" ~ 300,
Sampler == "Coriolis" ~ 300,
Sampler == "Electrostatic" ~ 10,
Sampler == "Burkhart" ~ 16.5,
)) %>%
mutate(air_vol = case_when(
duration == "30 min" ~ flow*30,
duration == "5 hours" ~ flow*5*60
))
ASV_tax_long <-
ASV_sp %>%
pivot_longer(-Sample, names_to = "seq", values_to = "reads") %>%
dplyr::rename(Index = Sample) %>%
left_join(Meta) %>%
left_join(ITS_taxa_80_glom)
Joining with `by = join_by(Index)`Joining with `by = join_by(seq)`
ASV_tax_long %>%
filter(grepl("control", Type)) %>%
filter(reads > 0) %>%
group_by(Type) %>%
summarise(reads = sum(reads))
NA
ASV_tax_long %>%
mutate(reads = ifelse(reads == 0, NA, reads)) %>%
arrange(Type, Sites, duration) %>%
# filter(Sampler == "MWAC") %>%
mutate(Sample_names = factor(Sample_names, levels = unique(.$Sample_names))) %>%
ggplot(aes(y = species, x = Sample_names, size = reads, colour = Type))+
geom_point()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
NA
rar_curve <-
ASV_sp %>%
select(-Sample) %>%
as.matrix %>%
`rownames<-`(ASV_sp$Sample) %>%
`[`(rowSums(.) > 0,) %>%
rarecurve(x = .,step = 100, tidy = TRUE)
rar_curve %>% left_join(Meta, by = c("Site" = "Index")) %>%
filter(grepl("active|passive", Type)) %>%
ggplot(aes(x = Sample, y = Species, group = Site, colour = duration)) +
geom_line()+
facet_wrap(~Sampler)
rar <-
ASV_sp %>%
select(-Sample) %>%
as.matrix %>%
`rownames<-`(ASV_sp$Sample) %>%
`[`(rowSums(.) > 0,) %>%
rarefy(x = .,sample = 2000)
Warning: requested 'sample' was larger than smallest site maximum (1)
S_df <-
rar %>%
data.frame(S = .) %>%
dplyr::add_rownames(var = "Index") %>%
left_join(Meta) %>%
filter(Type %in% c("active", "passive")) %>%
filter(Sampler != "Drone") %>%
mutate(Sampler = factor(Sampler, levels= unique(.$Sampler)[c(8,1,3,2,5,6,4,7)]))
Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
Please use `tibble::rownames_to_column()` instead.Joining with `by = join_by(Index)`
S_df %>%
ggplot(aes(y = S, x = duration, colour = Sites))+
facet_grid(~Sampler, scales = "free_x")+
geom_line(aes(group = Sites), size = 0.2, colour = "grey")+
geom_point()+
theme_minimal()+
scale_color_brewer(palette = "Set1")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.position = "bottom")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
S_df %>%
left_join(flow_rate) %>%
filter(Type == "active" & Sampler != "Drone") %>%
ggplot(aes(x = air_vol, y = S, colour = Sampler))+
geom_line(aes(group = Sites), size = 0.2, colour = "grey")+
geom_point()+
facet_grid(~duration)+
scale_x_log10()+
scale_color_brewer(palette = "Set1")+
theme_bw()
Joining with `by = join_by(Sampler, duration)`
NA
NA
#Agaricomycetes
ASV_tax_long %>%
filter(class == "Agaricomycetes") %>%
#filter(phylum == "Basidiomycota") %>%
filter(!is.na(species)) %>%
filter(Type %in% c("active", "passive")) %>%
filter(duration != "30 min") %>%
filter(Sampler != "Drone") %>%
mutate(Sampler = factor(Sampler, levels= unique(.$Sampler)[c(8,1,3,2,5,6,4,7)])) %>%
mutate(reads = ifelse(reads == 0, NA, reads)) %>%
arrange(Type, Sites, duration) %>%
# filter(Sampler == "MWAC") %>%
mutate(Sample_names = factor(Sample_names, levels = unique(.$Sample_names))) %>%
ggplot(aes(y = species, x = Sampler, size = reads, colour = duration))+
geom_point(alpha = 0.6)+
facet_grid(~Sites, scales = "free_x")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
scale_color_brewer(palette = "Set1")+
ggtitle("Species composition by site - Agaricomycetes")
NA
ASV_agri <- ITS_taxa_80_glom %>%
filter(class == "Agaricomycetes") %>%
pull(seq)
rar_agri <-
ASV_sp %>%
select(all_of(ASV_agri)) %>%
as.matrix %>%
`rownames<-`(ASV_sp$Sample) %>%
`[`(rowSums(.) > 0,) %>%
rarefy(x = .,sample = 2000)
Warning: requested 'sample' was larger than smallest site maximum (1)
S_df_agri <-
rar_agri %>%
data.frame(S = .) %>%
dplyr::add_rownames(var = "Index") %>%
left_join(Meta) %>%
filter(Type %in% c("active", "passive")) %>%
filter(Sampler != "Drone") %>%
mutate(Sampler = factor(Sampler, levels= unique(.$Sampler)[c(8,1,3,2,5,6,4,7)]))
Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
Please use `tibble::rownames_to_column()` instead.Joining with `by = join_by(Index)`
S_df_agri %>%
ggplot(aes(y = S, x = duration, colour = Sites))+
facet_grid(~Sampler, scales = "free_x")+
geom_line(aes(group = Sites), size = 0.2, colour = "grey")+
geom_point()+
theme_minimal()+
scale_color_brewer(palette = "Set1")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.position = "bottom")+
ggtitle("rarefied richness - Agaricomycetes")
S_df_agri %>%
left_join(flow_rate) %>%
filter(Type == "active" & Sampler != "Drone") %>%
ggplot(aes(x = air_vol, y = S, colour = Sampler))+
# geom_line(aes(group = Sites), size = 0.2, colour = "grey")+
geom_point(position = position_jitter(width = 0.1))+
geom_smooth(aes(group = 1), method = "lm", se = TRUE, colour = "black", size = 0.2)+
facet_grid(~duration)+
scale_x_log10()+
scale_color_brewer(palette = "Set1")+
theme_minimal()+
theme(legend.position = "bottom")+
labs(x = "sampled air volume (l)", title = "sampled air volume")
Joining with `by = join_by(Sampler, duration)`
NA
NA
S_df_agri %>%
left_join(flow_rate) %>%
left_join(DNAC) %>%
mutate(yield = DNAC * 200) %>%
filter(Type %in% c("active", "passive") & Sampler != "Drone") %>%
ggplot(aes(x = yield, y = S, colour = Sampler))+
# geom_line(aes(group = Sites), size = 0.2, colour = "grey")+
geom_point(position = position_jitter(width = 0.1))+
geom_smooth(aes(group = 1), method = "lm", se = TRUE, colour = "black", size = 0.2)+
facet_grid(~duration)+
scale_x_log10()+
scale_color_brewer(palette = "Set1")+
theme_minimal()+
theme(legend.position = "bottom")+
labs(x = "DNA yield (ng)", title = "DNA yield")
Joining with `by = join_by(Sampler, duration)`Joining with `by = join_by(Plate, Sample_names, Sites, Sampler, duration, Type)`